library("tidyverse")
library("knitr")
library("plotly")
Import data
eigenvectors <- readRDS("../results/2017-08-29_keratinocyte_pca_eigenvectors/eigenvectors_clusterTG.RDS")
# expression.data <- readRDS("../../data/25-08-2017_APC_E7_SCRNASEQ/keratinocyte_exp_matrix.RDS")
expression.data <- readRDS("../data/25-08-2017_APC_E7_SCRNASEQ/keratinocyte_exp_matrix.RDS")
rownames(expression.data) <- gsub("_.*", '', rownames(expression.data))
# keep.e7 <- which(rownames(expression.data) == "E7")
# expression.data <- expression.data[c(keep.e7, sample(x = seq_len(nrow(expression.data)), size = 299)), sample(x = seq_len(ncol(expression.data)), size = 200)]
Get summary statistics
expression.data %>%
colnames() %>%
strsplit("_") %>%
lapply("[", 1) %>%
unlist() -> samples
Number of cells per sample
samples %>%
table() %>%
as.data.frame() %>%
setNames(c("Samples", "Frequency"))
## Samples Frequency
## 1 1 3230
## 2 2 2632
## 3 3 1771
## 4 4 907
groups <- factor(ifelse(samples == 1 | samples == 3, "WT", "TG"))
Number of cells per cluster (“Wild type” vs. “Transgenic”)
groups %>%
table() %>%
as.data.frame() %>%
setNames(c("Cluster", "Frequency")) %>%
kable()
# Get row for E7 gene expression
e7.idx <- which(rownames(expression.data) == "E7")
# Create data frame with cell id, E7 expression, group and sample info
viral.genes <- data.frame(cell.id = names(expression.data[e7.idx,]),
e7 = expression.data[e7.idx,],
groups = factor(groups),
samples = factor(samples))
# Assign if a cell is infected or not depending on gene expression threshold of E7 gene
viral.genes %>%
mutate(status = as.factor(if_else((samples == "2" | samples == "4") & e7 > 0,
"Infected",
"Non-infected")),
cell.id = factor(cell.id)) -> infected.cells
Number of cells depending on infection status
# Get number of cells infected
infected.cells %>%
group_by(status) %>%
summarise(n = n()) %>%
kable()
| Infected |
726 |
| Non-infected |
7814 |
Number of cells depending on infection status (stratified by sample)
# Get total number of cells infected stratified by sample inf
infected.cells %>%
group_by(status, samples) %>%
summarise(n = n()) %>%
arrange(samples) %>%
kable()
| Non-infected |
1 |
3230 |
| Infected |
2 |
474 |
| Non-infected |
2 |
2158 |
| Non-infected |
3 |
1771 |
| Infected |
4 |
252 |
| Non-infected |
4 |
655 |
transgenic.exp.data <- expression.data[,colnames(expression.data) %in% infected.cells$cell.id]
e7.idx <- which(rownames(transgenic.exp.data) == "E7")
transgenic.exp.data <- transgenic.exp.data[-e7.idx,]
# Make sure all cells in new gene expression matrix are ordered according to infected.cells data frame
# all(colnames(transgenic.exp.data) == infected.cells$cell.id)
keratinocytes <- as.tibble(cbind(eigenvectors, infected.cells))
keratinocytes <- keratinocytes %>%
mutate(batch = if_else(samples == 1 | samples == 2, "Batch1", "Batch2"))
PCA plots
Sample origin
plot_ly(keratinocytes, x = ~PC1, y = ~PC2, z = ~PC3,
color = ~samples,
colors = "Set3",
marker = list(size = 2),
size = 10, replace = TRUE)
Cluster WT vs. TG
plot_ly(keratinocytes, x = ~PC1, y = ~PC2, z = ~PC3,
color = ~groups,
colors = "Set1",
marker = list(size = 2))
Detected viral transcripts
plot_ly(keratinocytes, x = ~PC1, y = ~PC2, z = ~PC3,
color = ~status,
colors = "Set2",
marker = list(size = 2))
Batch info
plot_ly(keratinocytes, x = ~PC1, y = ~PC2, z = ~PC3,
color = ~batch,
colors = "Accent",
marker = list(size = 2))